

* +++++++++++++++++++++
* CLEAN INTEGRATION
* SPORTS CLUBS
* +++++++++++++++++++++

/*
Stützpunktvereine Integration durch Sport
*/

* globals 
global data_raw_sport "${data_raw}/integration_sports_clubs/"

import excel "${data_raw_sport}/stützpunktvereine.xlsx", clear firstrow

drop D

* give meaningful names to variables
ren (Name PLZ Ort) (club_name plz place_name)
destring plz, replace force

* crosswalk PLZ to Kreis
merge m:1 plz using "${data_derived}/cw_krs_plz_clean.dta", keep(1 3) assert(1 2 3) 

preserve
	keep if _merge ==3 
	drop _merge
	tempfile first_round
	save `first_round'
restore
	
* if PLZ based assignment to Kreis doesn't work, try it based on names
keep if _merge ==1 
drop ags
bys place_name : keep if _n==1 

preserve
	use "${data_derived}/cw_krs_plz_clean.dta", clear
	bys place_name ags : keep if _n==1
	tempfile cw_place_name_ags
	save `cw_place_name_ags'
restore

merge 1:m place_name using `cw_place_name_ags', keep(3) nogen

keep club_name plz place_name ags
drop if ags ==. 
drop if club_name =="SV Warnow 90 e.V." & ags ==13072 // otherwise duplicate

* combine the two approaces
append using `first_round'

sort ags club_name 

save "${data_derived}/integration_durch_sport_clubs.dta", replace
export delimited "${data_derived}/integration_durch_sport_clubs.csv", replace
